***********************************************************************
******
****** AKMP_Ambiguity_CreateVars.do
******
****** Prepares the DHS abiguity dataset for analysis and provides basic summary stats.
****** Prepares ambiguity and risk variables, as well as sociio-demographic vars
****** available in the original dataset. 
******
****** CURRENT VERSION:
****** Created  on 28 May 2018; by Roy Kouwenberg and Kanin Anantanasuwong
****** Modified on 10 Jul 2018; by Roy Kouwenberg 
****** Modified on  7 Feb 2024; by Roy Kouwenberg (cleaned up for replication code)
****** 
******
****** Input files  : CL_RiskAmbiguity_6p.dta,     the raw data from our Risk & Ambiguity survey module
******				  Selectie_RiskAmbiguityp.dta, datafile indicating which CSS/DHS respondents were selected for our Risk & Ambiguity survey module
****** 				  wealth2018_summary.dta,      the decoded 2018 DHS household wealth data
******
****** Output files : AKMP_Ambiguity_MainData.dta,  the main dataset for our article
****** 				  AKMP_Ambiguity_PanelData.dta, the dataset for our article in panel format
******

***********************************************************************
****** Set the working directory path and file names
***********************************************************************

** Set the working directory
cd "D:\Mydocs\Papers\AKMP\Replication\Data"

** Open "Selectie_RiskAmbiguityp.dta", the CentERdata datafile with information on the panel members selected for the survey.
** This is just to check that the Stata data file is available in the directory, as it will be merged in later. 
use "Selectie_RiskAmbiguityp.dta", clear

** Open "CL_RiskAmbiguity_6p.dta", the CentERdata datafile for the Risk & Ambiguity module 
use "CL_RiskAmbiguity_6p.dta", clear

** Set the filename for the processed main data file (regular format)
global fn_main "AKMP_Ambiguity_MainData.dta"

** Set the filename for the processed panel data (panel format)
global fn_panel "AKMP_Ambiguity_PanelData.dta"

***********************************************************************
****** Process the ambiguity questions data: practice question about temperature in Amsterdam
***********************************************************************

* Calculate matching probability for the AMBIGUITY_PRACTICE question
g m_ambp = . 
replace m_ambp = 0.0125 if AMBIGUITY_PRACTICE_2 > AMBIGUITY_PRACTICE_1
replace m_ambp = 0.0375 if AMBIGUITY_PRACTICE_3 > AMBIGUITY_PRACTICE_2
replace m_ambp = 0.075 if AMBIGUITY_PRACTICE_4 > AMBIGUITY_PRACTICE_3
replace m_ambp = 0.15 if AMBIGUITY_PRACTICE_5 > AMBIGUITY_PRACTICE_4
replace m_ambp = 0.25 if AMBIGUITY_PRACTICE_6 > AMBIGUITY_PRACTICE_5
replace m_ambp = 0.35 if AMBIGUITY_PRACTICE_7 > AMBIGUITY_PRACTICE_6
replace m_ambp = 0.45 if AMBIGUITY_PRACTICE_8 > AMBIGUITY_PRACTICE_7
replace m_ambp = 0.55 if AMBIGUITY_PRACTICE_9 > AMBIGUITY_PRACTICE_8
replace m_ambp = 0.65 if AMBIGUITY_PRACTICE_10 > AMBIGUITY_PRACTICE_9
replace m_ambp = 0.75 if AMBIGUITY_PRACTICE_11 > AMBIGUITY_PRACTICE_10
replace m_ambp = 0.85 if AMBIGUITY_PRACTICE_12 > AMBIGUITY_PRACTICE_11
replace m_ambp = 0.925 if AMBIGUITY_PRACTICE_13 > AMBIGUITY_PRACTICE_12
replace m_ambp = 0.9625 if AMBIGUITY_PRACTICE_14 > AMBIGUITY_PRACTICE_13
replace m_ambp = 0.9875 if AMBIGUITY_PRACTICE_15 > AMBIGUITY_PRACTICE_14
replace m_ambp = 1 if AMBIGUITY_PRACTICE_15 == 1
replace m_ambp = 0 if AMBIGUITY_PRACTICE_1 == 2
label variable m_ambp "Matching probability AMBIGUITY_PRACTICE"
summ m_ambp
tab m_ambp

* Check irrational responses (always A, or always B) for AMBIGUITY_PRACTICE
gen ir_ambp = . if  AMBIGUITY_PRACTICE_1 == . | AMBIGUITY_PRACTICE_15 == .
replace ir_ambp = 1 if AMBIGUITY_PRACTICE_1 == 2
replace ir_ambp = 1 if AMBIGUITY_PRACTICE_15 == 1
label variable ir_ambp "Dummy irrational responses AMBIGUITY_PRACTICE"
summ ir_ambp
tab ir_ambp



***********************************************************************
****** Process ambiguity questions data: AEX stock index
***********************************************************************

* Calculate matching probabilities for the 6 AEX ambiguity questions
forval i = 1/6 {
g m_aex`i' = .
replace m_aex`i' = 0.0125 if AEX`i'_2 > AEX`i'_1
replace m_aex`i' = 0.0375 if AEX`i'_3 > AEX`i'_2
replace m_aex`i' = 0.075 if AEX`i'_4 > AEX`i'_3
replace m_aex`i' = 0.15 if AEX`i'_5 > AEX`i'_4
replace m_aex`i' = 0.25 if AEX`i'_6 > AEX`i'_5
replace m_aex`i' = 0.35 if AEX`i'_7 > AEX`i'_6
replace m_aex`i' = 0.45 if AEX`i'_8 > AEX`i'_7
replace m_aex`i' = 0.55 if AEX`i'_9 > AEX`i'_8
replace m_aex`i' = 0.65 if AEX`i'_10 > AEX`i'_9
replace m_aex`i' = 0.75 if AEX`i'_11 > AEX`i'_10
replace m_aex`i' = 0.85 if AEX`i'_12 > AEX`i'_11
replace m_aex`i' = 0.925 if AEX`i'_13 > AEX`i'_12
replace m_aex`i' = 0.9625 if AEX`i'_14 > AEX`i'_13
replace m_aex`i' = 0.9875 if AEX`i'_15 > AEX`i'_14
replace m_aex`i' = 1 if AEX`i'_15 == 1
replace m_aex`i' = 0 if AEX`i'_1 == 2
}
label variable m_aex1 "Matching probability AEX1"
label variable m_aex2 "Matching probability AEX2"
label variable m_aex3 "Matching probability AEX3"
label variable m_aex4 "Matching probability AEX4"
label variable m_aex5 "Matching probability AEX5"
label variable m_aex6 "Matching probability AEX6"
summ m_aex1 m_aex2 m_aex3 m_aex4 m_aex5 m_aex6 

* Create variables for mistakes (irrational answers) on the AEX ambiguity questions
* Mistakes: always Choice A, or always Choice B
forval i = 1/6 {
  gen ir_aex`i' = 0
  replace ir_aex`i' = . if AEX`i'_1 == . | AEX`i'_15 == .
  replace ir_aex`i' = 1 if AEX`i'_1 == 2
  replace ir_aex`i' = 1 if AEX`i'_15 == 1
}
label variable ir_aex1 "Dummy irrational responses AEX1"
label variable ir_aex2 "Dummy irrational responses AEX2"
label variable ir_aex3 "Dummy irrational responses AEX3"
label variable ir_aex4 "Dummy irrational responses AEX4"
label variable ir_aex5 "Dummy irrational responses AEX5"
label variable ir_aex6 "Dummy irrational responses AEX6"
summ ir_aex1 ir_aex2 ir_aex3 ir_aex4 ir_aex5 ir_aex6 

* Number of irrational responses
gen nir_aex = ir_aex1 + ir_aex2 + ir_aex3 + ir_aex4 + ir_aex5 + ir_aex6
label variable nir_aex "Number of irrational responses to 6 AEX ambiguity questions"
summ nir_aex
tab nir_aex

* Dummy for 1 or more irrational responses
gen dir_aex = 0 if nir_aex < .
replace dir_aex = 1 if nir_aex > 0 & nir_aex < .
label variable dir_aex "Dummy for 1 or more irrational responses to 6 AEX ambiguity questions"
summ dir_aex
tab dir_aex nir_aex

* Create a dummy for switching at same row on all 6 questions
summ m_aex1 m_aex2 m_aex3 m_aex4 m_aex5 m_aex6  if m_aex1 == m_aex2 & m_aex2 == m_aex3 & m_aex3 == m_aex4 & m_aex4 == m_aex5 & m_aex5 == m_aex6
gen dsamerow_aex = 0 if m_aex1 < . & m_aex6 < .
replace dsamerow_aex = 1 if m_aex1 < . & m_aex6 < . & m_aex1 == m_aex2 & m_aex2 == m_aex3 & m_aex3 == m_aex4 & m_aex4 == m_aex5 & m_aex5 == m_aex6
label variable dsamerow_aex "Dummy = 1 for switching on the same row for all 6 AEX ambiguity questions"
summ dsamerow_aex
summ dsamerow_aex m_aex1 m_aex2 m_aex3 m_aex4 m_aex5 m_aex6 if dsamerow_aex == 1
summ dsamerow_aex m_aex1 m_aex2 m_aex3 m_aex4 m_aex5 m_aex6 if dsamerow_aex == 0



***********************************************************************
****** Process ambiguity questions data: MSCI World stock index
***********************************************************************

* Calculate matching probabilities for the 6 MSCI ambiguity questions
forval i = 1/6 {
g m_msci`i' = .
replace m_msci`i' = 0.0125 if MSCI`i'_2 > MSCI`i'_1
replace m_msci`i' = 0.0375 if MSCI`i'_3 > MSCI`i'_2
replace m_msci`i' = 0.075 if MSCI`i'_4 > MSCI`i'_3
replace m_msci`i' = 0.15 if MSCI`i'_5 > MSCI`i'_4
replace m_msci`i' = 0.25 if MSCI`i'_6 > MSCI`i'_5
replace m_msci`i' = 0.35 if MSCI`i'_7 > MSCI`i'_6
replace m_msci`i' = 0.45 if MSCI`i'_8 > MSCI`i'_7
replace m_msci`i' = 0.55 if MSCI`i'_9 > MSCI`i'_8
replace m_msci`i' = 0.65 if MSCI`i'_10 > MSCI`i'_9
replace m_msci`i' = 0.75 if MSCI`i'_11 > MSCI`i'_10
replace m_msci`i' = 0.85 if MSCI`i'_12 > MSCI`i'_11
replace m_msci`i' = 0.925 if MSCI`i'_13 > MSCI`i'_12
replace m_msci`i' = 0.9625 if MSCI`i'_14 > MSCI`i'_13
replace m_msci`i' = 0.9875 if MSCI`i'_15 > MSCI`i'_14
replace m_msci`i' = 1 if MSCI`i'_15 == 1
replace m_msci`i' = 0 if MSCI`i'_1 == 2
}
label variable m_msci1 "Matching probability MSCI1"
label variable m_msci2 "Matching probability MSCI2"
label variable m_msci3 "Matching probability MSCI3"
label variable m_msci4 "Matching probability MSCI4"
label variable m_msci5 "Matching probability MSCI5"
label variable m_msci6 "Matching probability MSCI6"
summ m_msci1 m_msci2 m_msci3 m_msci4 m_msci5 m_msci6 

* Create variables for mistakes (irrational answers) on the MSCI ambiguity questions
* Mistakes: always Choice A, or always Choice B
forval i = 1/6 {
  gen ir_msci`i' = 0
  replace ir_msci`i' = . if MSCI`i'_1 == . | MSCI`i'_15 == .
  replace ir_msci`i' = 1 if MSCI`i'_1 == 2
  replace ir_msci`i' = 1 if MSCI`i'_15 == 1
}
label variable ir_msci1 "Dummy irrational responses MSCI1"
label variable ir_msci2 "Dummy irrational responses MSCI2"
label variable ir_msci3 "Dummy irrational responses MSCI3"
label variable ir_msci4 "Dummy irrational responses MSCI4"
label variable ir_msci5 "Dummy irrational responses MSCI5"
label variable ir_msci6 "Dummy irrational responses MSCI6"
summ ir_msci1 ir_msci2 ir_msci3 ir_msci4 ir_msci5 ir_msci6 

* Number of irrational responses
gen nir_msci = ir_msci1 + ir_msci2 + ir_msci3 + ir_msci4 + ir_msci5 + ir_msci6
label variable nir_msci "Number of irrational responses to 6 MSCI ambiguity questions"
summ nir_msci
tab nir_msci

* Dummy for 1 or more irrational responses
gen dir_msci = 0 if nir_msci < .
replace dir_msci = 1 if nir_msci > 0 & nir_msci < .
label variable dir_msci "Dummy for 1 or more irrational responses to 6 MSCI ambiguity questions"
summ dir_msci
tab dir_msci nir_msci

* Create a dummy for switching at same row on all 6 questions
summ m_msci1 m_msci2 m_msci3 m_msci4 m_msci5 m_msci6  if m_msci1 == m_msci2 & m_msci2 == m_msci3 & m_msci3 == m_msci4 & m_msci4 == m_msci5 & m_msci5 == m_msci6
gen dsamerow_msci = 0 if m_msci1 < . & m_msci6 < .
replace dsamerow_msci = 1 if m_msci1 < . & m_msci6 < . & m_msci1 == m_msci2 & m_msci2 == m_msci3 & m_msci3 == m_msci4 & m_msci4 == m_msci5 & m_msci5 == m_msci6
label variable dsamerow_msci "Dummy = 1 for switching on the same row for all 6 MSCI ambiguity questions"
summ dsamerow_msci
summ dsamerow_msci m_msci1 m_msci2 m_msci3 m_msci4 m_msci5 m_msci6 if dsamerow_msci == 1
summ dsamerow_msci m_msci1 m_msci2 m_msci3 m_msci4 m_msci5 m_msci6 if dsamerow_msci == 0


***********************************************************************
****** Process ambiguity questions data: familiar company stock
***********************************************************************

* Calculate matching probabilities for the 6 STOCK ambiguity questions
forval i = 1/6 {
g m_stock`i' = .
replace m_stock`i' = 0.0125 if STOCK`i'_2 > STOCK`i'_1
replace m_stock`i' = 0.0375 if STOCK`i'_3 > STOCK`i'_2
replace m_stock`i' = 0.075 if STOCK`i'_4 > STOCK`i'_3
replace m_stock`i' = 0.15 if STOCK`i'_5 > STOCK`i'_4
replace m_stock`i' = 0.25 if STOCK`i'_6 > STOCK`i'_5
replace m_stock`i' = 0.35 if STOCK`i'_7 > STOCK`i'_6
replace m_stock`i' = 0.45 if STOCK`i'_8 > STOCK`i'_7
replace m_stock`i' = 0.55 if STOCK`i'_9 > STOCK`i'_8
replace m_stock`i' = 0.65 if STOCK`i'_10 > STOCK`i'_9
replace m_stock`i' = 0.75 if STOCK`i'_11 > STOCK`i'_10
replace m_stock`i' = 0.85 if STOCK`i'_12 > STOCK`i'_11
replace m_stock`i' = 0.925 if STOCK`i'_13 > STOCK`i'_12
replace m_stock`i' = 0.9625 if STOCK`i'_14 > STOCK`i'_13
replace m_stock`i' = 0.9875 if STOCK`i'_15 > STOCK`i'_14
replace m_stock`i' = 1 if STOCK`i'_15 == 1
replace m_stock`i' = 0 if STOCK`i'_1 == 2
}
label variable m_stock1 "Matching probability STOCK1"
label variable m_stock2 "Matching probability STOCK2"
label variable m_stock3 "Matching probability STOCK3"
label variable m_stock4 "Matching probability STOCK4"
label variable m_stock5 "Matching probability STOCK5"
label variable m_stock6 "Matching probability STOCK6"
summ m_stock1 m_stock2 m_stock3 m_stock4 m_stock5 m_stock6 

* Create variables for mistakes (irrational answers) on the STOCK ambiguity questions
* Mistakes: always Choice A, or always Choice B
forval i = 1/6 {
  gen ir_stock`i' = 0
  replace ir_stock`i' = . if STOCK`i'_1 == . | STOCK`i'_15 == .
  replace ir_stock`i' = 1 if STOCK`i'_1 == 2
  replace ir_stock`i' = 1 if STOCK`i'_15 == 1
}
label variable ir_stock1 "Dummy irrational responses STOCK1"
label variable ir_stock2 "Dummy irrational responses STOCK2"
label variable ir_stock3 "Dummy irrational responses STOCK3"
label variable ir_stock4 "Dummy irrational responses STOCK4"
label variable ir_stock5 "Dummy irrational responses STOCK5"
label variable ir_stock6 "Dummy irrational responses STOCK6"
summ ir_stock1 ir_stock2 ir_stock3 ir_stock4 ir_stock5 ir_stock6 

* Number of irrational responses
gen nir_stock = ir_stock1 + ir_stock2 + ir_stock3 + ir_stock4 + ir_stock5 + ir_stock6
label variable nir_stock "Number of irrational responses to 6 STOCK ambiguity questions"
summ nir_stock
tab nir_stock

* Dummy for 1 or more irrational responses
gen dir_stock = 0 if nir_stock < .
replace dir_stock = 1 if nir_stock > 0 & nir_stock < .
label variable dir_stock "Dummy for 1 or more irrational responses to 6 STOCK ambiguity questions"
summ dir_stock
tab dir_stock nir_stock

* Create a dummy for switching at same row on all 6 questions
summ m_stock1 m_stock2 m_stock3 m_stock4 m_stock5 m_stock6  if m_stock1 == m_stock2 & m_stock2 == m_stock3 & m_stock3 == m_stock4 & m_stock4 == m_stock5 & m_stock5 == m_stock6
gen dsamerow_stock = 0 if m_stock1 < . & m_stock6 < .
replace dsamerow_stock = 1 if m_stock1 < . & m_stock6 < . & m_stock1 == m_stock2 & m_stock2 == m_stock3 & m_stock3 == m_stock4 & m_stock4 == m_stock5 & m_stock5 == m_stock6
label variable dsamerow_stock "Dummy = 1 for switching on the same row for all 6 STOCK ambiguity questions"
summ dsamerow_stock
summ dsamerow_stock m_stock1 m_stock2 m_stock3 m_stock4 m_stock5 m_stock6 if dsamerow_stock == 1
summ dsamerow_stock m_stock1 m_stock2 m_stock3 m_stock4 m_stock5 m_stock6 if dsamerow_stock == 0


***********************************************************************
****** Process ambiguity questions data: Bitcoin
***********************************************************************

* Calculate matching probabilities for the 6 BITCOIN ambiguity questions
forval i = 1/6 {
g m_bitcoin`i' = .
replace m_bitcoin`i' = 0.0125 if BITCOIN`i'_2 > BITCOIN`i'_1
replace m_bitcoin`i' = 0.0375 if BITCOIN`i'_3 > BITCOIN`i'_2
replace m_bitcoin`i' = 0.075 if BITCOIN`i'_4 > BITCOIN`i'_3
replace m_bitcoin`i' = 0.15 if BITCOIN`i'_5 > BITCOIN`i'_4
replace m_bitcoin`i' = 0.25 if BITCOIN`i'_6 > BITCOIN`i'_5
replace m_bitcoin`i' = 0.35 if BITCOIN`i'_7 > BITCOIN`i'_6
replace m_bitcoin`i' = 0.45 if BITCOIN`i'_8 > BITCOIN`i'_7
replace m_bitcoin`i' = 0.55 if BITCOIN`i'_9 > BITCOIN`i'_8
replace m_bitcoin`i' = 0.65 if BITCOIN`i'_10 > BITCOIN`i'_9
replace m_bitcoin`i' = 0.75 if BITCOIN`i'_11 > BITCOIN`i'_10
replace m_bitcoin`i' = 0.85 if BITCOIN`i'_12 > BITCOIN`i'_11
replace m_bitcoin`i' = 0.925 if BITCOIN`i'_13 > BITCOIN`i'_12
replace m_bitcoin`i' = 0.9625 if BITCOIN`i'_14 > BITCOIN`i'_13
replace m_bitcoin`i' = 0.9875 if BITCOIN`i'_15 > BITCOIN`i'_14
replace m_bitcoin`i' = 1 if BITCOIN`i'_15 == 1
replace m_bitcoin`i' = 0 if BITCOIN`i'_1 == 2
}
label variable m_bitcoin1 "Matching probability BITCOIN1"
label variable m_bitcoin2 "Matching probability BITCOIN2"
label variable m_bitcoin3 "Matching probability BITCOIN3"
label variable m_bitcoin4 "Matching probability BITCOIN4"
label variable m_bitcoin5 "Matching probability BITCOIN5"
label variable m_bitcoin6 "Matching probability BITCOIN6"
summ m_bitcoin1 m_bitcoin2 m_bitcoin3 m_bitcoin4 m_bitcoin5 m_bitcoin6 

* Create variables for mistakes (irrational answers) on the BITCOIN ambiguity questions
* Mistakes: always Choice A, or always Choice B
forval i = 1/6 {
  gen ir_bitcoin`i' = 0
  replace ir_bitcoin`i' = . if BITCOIN`i'_1 == . | BITCOIN`i'_15 == .
  replace ir_bitcoin`i' = 1 if BITCOIN`i'_1 == 2
  replace ir_bitcoin`i' = 1 if BITCOIN`i'_15 == 1
}
label variable ir_bitcoin1 "Dummy irrational responses BITCOIN1"
label variable ir_bitcoin2 "Dummy irrational responses BITCOIN2"
label variable ir_bitcoin3 "Dummy irrational responses BITCOIN3"
label variable ir_bitcoin4 "Dummy irrational responses BITCOIN4"
label variable ir_bitcoin5 "Dummy irrational responses BITCOIN5"
label variable ir_bitcoin6 "Dummy irrational responses BITCOIN6"
summ ir_bitcoin1 ir_bitcoin2 ir_bitcoin3 ir_bitcoin4 ir_bitcoin5 ir_bitcoin6 

* Number of irrational responses
gen nir_bitcoin = ir_bitcoin1 + ir_bitcoin2 + ir_bitcoin3 + ir_bitcoin4 + ir_bitcoin5 + ir_bitcoin6
label variable nir_bitcoin "Number of irrational responses to 6 BITCOIN ambiguity questions"
summ nir_bitcoin
tab nir_bitcoin

* Dummy for 1 or more irrational responses
gen dir_bitcoin = 0 if nir_bitcoin < .
replace dir_bitcoin = 1 if nir_bitcoin > 0 & nir_bitcoin < .
label variable dir_bitcoin "Dummy for 1 or more irrational responses to 6 BITCOIN ambiguity questions"
summ dir_bitcoin
tab dir_bitcoin nir_bitcoin

* Create a dummy for switching at same row on all 6 questions
summ m_bitcoin1 m_bitcoin2 m_bitcoin3 m_bitcoin4 m_bitcoin5 m_bitcoin6  if m_bitcoin1 == m_bitcoin2 & m_bitcoin2 == m_bitcoin3 & m_bitcoin3 == m_bitcoin4 & m_bitcoin4 == m_bitcoin5 & m_bitcoin5 == m_bitcoin6
gen dsamerow_bitcoin = 0 if m_bitcoin1 < . & m_bitcoin6 < .
replace dsamerow_bitcoin = 1 if m_bitcoin1 < . & m_bitcoin6 < . & m_bitcoin1 == m_bitcoin2 & m_bitcoin2 == m_bitcoin3 & m_bitcoin3 == m_bitcoin4 & m_bitcoin4 == m_bitcoin5 & m_bitcoin5 == m_bitcoin6
label variable dsamerow_bitcoin "Dummy = 1 for switching on the same row for all 6 BITCOIN ambiguity questions"
summ dsamerow_bitcoin
summ dsamerow_bitcoin m_bitcoin1 m_bitcoin2 m_bitcoin3 m_bitcoin4 m_bitcoin5 m_bitcoin6 if dsamerow_bitcoin == 1
summ dsamerow_bitcoin m_bitcoin1 m_bitcoin2 m_bitcoin3 m_bitcoin4 m_bitcoin5 m_bitcoin6 if dsamerow_bitcoin == 0



****************************************************************************
**** Counters for irrational (dominated) answers and same-row switching 
****************************************************************************

* nir_tot: sum of number of dominated answers for the 4 sources, out of 24 (range: 0-24)
gen nir_tot = nir_aex + nir_msci + nir_stock + nir_bitcoin 
label var nir_tot "Number of dominated answers for the 4 sources, out of 24 possible"
summ nir_tot 
tab nir_tot 

* ns_samerow: number of sources (out of 4) with "same row" switching on all choicelists 
* note: dsamerow_aex = 1 implies a_aex = 1, not distinguishing at all between the 6 ambiguous events
gen ns_samerow = dsamerow_aex + dsamerow_msci + dsamerow_stock + dsamerow_bitcoin
label var ns_samerow "Number of sources with same-row switching (range: 0-4)"
summ ns_samerow
tab ns_samerow


****************************************************************************
**** Dummy for spending at least 10 min. answering the survey module questions
****************************************************************************

* duur: number of seconds spent answering the suvery module
* dge10min: dummy equal to 1 if respondent spent at least 10 minutes
gen dge10min = 0 if duur < .
replace dge10min = 1 if duur >= 600 & duur < .
label var dge10min "Dummy for spending at least 10 minutes to answer the survey questions"
tab dge10min, missing


*********************************************************************
**** Dummy for incorrect familiar stock names mentioned by respondents
*********************************************************************

* Create a dummy for respondents who mentioned an incorrect stock name 
* when asked to mention a familiar individual stock for the STOCK question
* (such as, mutual funds, bitcoin, "none", etc...)
* The dummy "dstock_invalid" = 1 for invalid responses, and 0 otherwise
*
* List of invalid responses, as of 6 July 2018 (screened by Roy):
* 1. ASN milieu en waterfonds 
* 2. Ben niet bekend met aandelen
* 3. Bitcoin
* 4. Geen
* 5. Meewind
* 6. Rabo
* 7. Triodos cultuurfonds
* 8. bnp parisbas fortis obam
* 9. certificaat Triodosbank
* 10. geen
* 11. geen enkel aandeel
* 12. hollandsbezit
* 13. triodos bank groenfons
*
gen dstock_invalid = 0 
replace dstock_invalid = . if AMBIGUITY_STOCK_INTRODUCTION == " " 
label variable dstock_invalid "Invalid name for indivual stock (e.g, a mutual fund, or bitcoin)"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "ASN milieu en waterfonds"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "Ben niet bekend met aandelen"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "Bitcoin"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "Geen"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "Meewind"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "Rabo"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "Triodos cultuurfonds"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "bnp parisbas fortis obam"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "certificaat Triodosbank"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "geen"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "geen enkel aandeel"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "hollandsbezit"
replace dstock_invalid = 1 if AMBIGUITY_STOCK_INTRODUCTION == "triodos bank groenfons"


***********************************************************************
****** Create ambiguity attitude indexes: a-index and b-index 
***********************************************************************

* Calculating a-index and b-index for AEX 
*
g ms_aex = (m_aex1 + m_aex2 + m_aex3)/3
g mc_aex = (m_aex4 + m_aex5 + m_aex6)/3
label variable ms_aex "Single-event matching probability AEX"
label variable mc_aex "Composite-event matching probability AEX"
g a_aex = 3 * (1/3 - (mc_aex - ms_aex))
g b_aex = 1 - mc_aex - ms_aex
label variable a_aex "a index AEX, perceived ambiguity"
label variable b_aex "b index AEX, ambiguity aversion"
summ a_aex b_aex
summ a_aex b_aex if dir_aex == 0
summ a_aex b_aex if dir_aex == 0 & dsamerow_aex == 0

* Calculating a-index and b-index for MSCI 
*
g ms_msci = (m_msci1 + m_msci2 + m_msci3)/3
g mc_msci = (m_msci4 + m_msci5 + m_msci6)/3
label variable ms_msci "Single-event matching probability MSCI"
label variable mc_msci "Composite-event matching probability MSCI"
g a_msci = 3 * (1/3 - (mc_msci - ms_msci))
g b_msci = 1 - mc_msci - ms_msci
label variable a_msci "a index MSCI, perceived ambiguity"
label variable b_msci "b index MSCI, ambiguity aversion"
summ a_msci b_msci
summ a_msci b_msci if dir_msci == 0
summ a_msci b_msci if dir_msci == 0 & dsamerow_msci == 0

* Calculating a-index and b-index for STOCK 
*
g ms_stock = (m_stock1 + m_stock2 + m_stock3)/3
g mc_stock = (m_stock4 + m_stock5 + m_stock6)/3
label variable ms_stock "Single-event matching probability STOCK"
label variable mc_stock "Composite-event matching probability STOCK"
g a_stock = 3 * (1/3 - (mc_stock - ms_stock))
g b_stock = 1 - mc_stock - ms_stock
* Note a_stock and b_stock are set to missing when the respondent 
* mentioned a invalid stock name for the ambiguity STOCK question (e.g., "bitcoin", or "none")
replace a_stock = . if dstock_invalid == 1
replace b_stock = . if dstock_invalid == 1
label variable a_stock "a index STOCK, perceived ambiguity"
label variable b_stock "b index STOCK, ambiguity aversion"
summ a_stock b_stock
summ a_stock b_stock if dir_stock == 0
summ a_stock b_stock if dir_stock == 0 & dsamerow_stock == 0

* Calculating a-index and b-index for BITCOIN 
*
g ms_bitcoin = (m_bitcoin1 + m_bitcoin2 + m_bitcoin3)/3
g mc_bitcoin = (m_bitcoin4 + m_bitcoin5 + m_bitcoin6)/3
label variable ms_bitcoin "Single-event matching probability BITCOIN"
label variable mc_bitcoin "Composite-event matching probability BITCOIN"
g a_bitcoin = 3 * (1/3 - (mc_bitcoin - ms_bitcoin))
g b_bitcoin = 1 - mc_bitcoin - ms_bitcoin
label variable a_bitcoin "a index BITCOIN, perceived ambiguity"
label variable b_bitcoin "b index BITCOIN, ambiguity aversion"
summ a_bitcoin b_bitcoin
summ a_bitcoin b_bitcoin if dir_bitcoin == 0
summ a_bitcoin b_bitcoin if dir_bitcoin == 0 & dsamerow_bitcoin == 0


* Calculate average a-index and b-index: b_avg and a_avg,
* averaged over AEX, MSCI, stock and Bitcoin
*
gen b_avg = (b_aex + b_stock + b_msci + b_bitcoin)/4
label variable b_avg "avg b-index, ambiguity aversion (avg over AEX, MSCI, stock and Bitcoin)"
summ b_avg 
 
gen a_avg = (a_aex + a_stock + a_msci + a_bitcoin)/4
label variable a_avg "avg a-index, perceived ambiguity (avg over AEX, MSCI, stock and Bitcoin)"
summ a_avg 


* ap: version of index a that only has values when index a is within the bounds 
*     for perceived ambiguity (0 ≤ a ≤ 1), and missing otherwise
gen ap_aex = a_aex if a_aex >= 0 & a_aex <= 1 
gen ap_stock = a_stock if a_stock >= 0 & a_stock <= 1 
gen ap_msci = a_msci if a_msci >= 0 & a_msci <= 1 
gen ap_bitcoin = a_bitcoin if a_bitcoin >= 0 & a_bitcoin <= 1 
gen ap_avg = a_avg if a_avg >= 0 & a_avg <= 1
label var ap_aex "AEX perceived ambiguity (0 ≤ a_aex ≤ 1)"
label var ap_stock "Stock perceived ambiguity (0 ≤ a_stock ≤ 1)"
label var ap_msci "MSCI perceived ambiguity (0 ≤ a_msci ≤ 1)"
label var ap_bitcoin "Bitcoin perceived ambiguity (0 ≤ a_bitcoin ≤ 1)"
label var ap_avg "Average perceived ambiguity (0 ≤ a_avg ≤ 1)"

* bp: version of index b that only has values when index a is within the bounds 
*     for perceived ambiguity (0 ≤ a ≤ 1), and missing otherwise
gen bp_aex = b_aex if a_aex >= 0 & a_aex <= 1 
gen bp_stock = b_stock if a_stock >= 0 & a_stock <= 1 
gen bp_msci = b_msci if a_msci >= 0 & a_msci <= 1 
gen bp_bitcoin = b_bitcoin if a_bitcoin >= 0 & a_bitcoin <= 1 
gen bp_avg = b_avg if a_avg >= 0 & a_avg <= 1
label var bp_aex "b_aex with sample limited to 0 ≤ a_aex ≤ 1"
label var bp_stock "b_stock with ample limited to 0 ≤ a_stock ≤ 1"
label var bp_msci "b_msci with sample limited to 0 ≤ a_msci ≤ 1"
label var bp_bitcoin "b_bitcoin with sample limited to 0 ≤ a_bitcoin ≤ 1"
label var bp_avg "b_avg with sample limited to 0 ≤ a_avg ≤ 1"



***********************************************************************
****** Count the number of set monotonicity violations 
***********************************************************************

* Generate dummies for each violation of a set inclusion monotonicty condition 
* Monotonicity violations for the AEX ambiguity events
gen v_aex_15 = 0 if m_aex1 < . & m_aex5 < .
replace v_aex_15 = 1 if m_aex1 > m_aex5 & m_aex1 < . & m_aex5 < .
gen v_aex_16 = 0 if m_aex1 < . & m_aex6 < .
replace v_aex_16 = 1 if m_aex1 > m_aex6 & m_aex1 < . & m_aex6 < .
gen v_aex_24 = 0 if m_aex2 < . & m_aex4 < .
replace v_aex_24 = 1 if m_aex2 > m_aex4 & m_aex2 < . & m_aex4 < .
gen v_aex_26 = 0 if m_aex2 < . & m_aex6 < .
replace v_aex_26 = 1 if m_aex2 > m_aex6 &  m_aex2 < . & m_aex6 < .
gen v_aex_34 = 0 if m_aex3 < . & m_aex4 < .
replace v_aex_34 = 1 if m_aex3 > m_aex4 & m_aex3 < . & m_aex4 < .
gen v_aex_35 = 0 if m_aex3 < . & m_aex5 < .
replace v_aex_35 = 1 if m_aex3 > m_aex5 & m_aex3 < . & m_aex5 < .
* nv_aex: number of monotonicity violations for AEX mp's, ranging from 0 to 6
gen nv_aex = v_aex_15 + v_aex_16 + v_aex_24 + v_aex_26 + v_aex_34 + v_aex_35

* Monotonicity violations for the familiar stock ambiguity events
gen v_stock_15 = 0 if m_stock1 < . & m_stock5 < .
replace v_stock_15 = 1 if m_stock1 > m_stock5 & m_stock1 < . & m_stock5 < .
gen v_stock_16 = 0 if m_stock1 < . & m_stock6 < .
replace v_stock_16 = 1 if m_stock1 > m_stock6 & m_stock1 < . & m_stock6 < .
gen v_stock_24 = 0 if m_stock2 < . & m_stock4 < .
replace v_stock_24 = 1 if m_stock2 > m_stock4 & m_stock2 < . & m_stock4 < .
gen v_stock_26 = 0 if m_stock2 < . & m_stock6 < .
replace v_stock_26 = 1 if m_stock2 > m_stock6 & m_stock2 < . & m_stock6 < .
gen v_stock_34 = 0 if m_stock3 < . & m_stock4 < .
replace v_stock_34 = 1 if m_stock3 > m_stock4 & m_stock3 < . & m_stock4 < .
gen v_stock_35 = 0 if m_stock3 < . & m_stock5 < .
replace v_stock_35 = 1 if m_stock3 > m_stock5 & m_stock3 < . & m_stock5 < .
* nv_stock: number of monotonicity violations for fam stock mp's, ranging from 0 to 6
gen nv_stock = v_stock_15 + v_stock_16 + v_stock_24 + v_stock_26 + v_stock_34 + v_stock_35

* Monotonicity violations for the MSCI ambiguity events
gen v_msci_15 = 0 if m_msci1 < . & m_msci5 < .
replace v_msci_15 = 1 if m_msci1 > m_msci5 & m_msci1 < . & m_msci5 < .
gen v_msci_16 = 0 if m_msci1 < . & m_msci6 < .
replace v_msci_16 = 1 if m_msci1 > m_msci6 & m_msci1 < . & m_msci6 < .
gen v_msci_24 = 0 if m_msci2 < . & m_msci4 < .
replace v_msci_24 = 1 if m_msci2 > m_msci4 & m_msci2 < . & m_msci4 < .
gen v_msci_26 = 0 if m_msci2 < . & m_msci6 < .
replace v_msci_26 = 1 if m_msci2 > m_msci6 & m_msci2 < . & m_msci6 < .
gen v_msci_34 = 0 if m_msci3 < . & m_msci4 < .
replace v_msci_34 = 1 if m_msci3 > m_msci4 & m_msci3 < . & m_msci4 < .
gen v_msci_35 = 0 if m_msci3 < . & m_msci5 < .
replace v_msci_35 = 1 if m_msci3 > m_msci5 & m_msci3 < . & m_msci5 < .
* nv_msci: number of monotonicity violations for MSCI mp's, ranging from 0 to 6
gen nv_msci = v_msci_15 + v_msci_16 + v_msci_24 + v_msci_26 + v_msci_34 + v_msci_35

* Monotonicity violations for the Bitcoin ambiguity events
gen v_bitcoin_15 = 0 if m_bitcoin1 < . & m_bitcoin5 < .
replace v_bitcoin_15 = 1 if m_bitcoin1 > m_bitcoin5 & m_bitcoin1 < . & m_bitcoin5 < .
gen v_bitcoin_16 = 0 if m_bitcoin1 < . & m_bitcoin6 < .
replace v_bitcoin_16 = 1 if m_bitcoin1 > m_bitcoin6 & m_bitcoin1 < . & m_bitcoin6 < .
gen v_bitcoin_24 = 0 if m_bitcoin2 < . & m_bitcoin4 < .
replace v_bitcoin_24 = 1 if m_bitcoin2 > m_bitcoin4 & m_bitcoin2 < . & m_bitcoin4 < .
gen v_bitcoin_26 = 0 if m_bitcoin2 < . & m_bitcoin6 < .
replace v_bitcoin_26 = 1 if m_bitcoin2 > m_bitcoin6 & m_bitcoin2 < . & m_bitcoin6 < .
gen v_bitcoin_34 = 0 if m_bitcoin3 < . & m_bitcoin4 < .
replace v_bitcoin_34 = 1 if m_bitcoin3 > m_bitcoin4 & m_bitcoin3 < . & m_bitcoin4 < .
gen v_bitcoin_35 = 0 if m_bitcoin3 < . & m_bitcoin5 < .
replace v_bitcoin_35 = 1 if m_bitcoin3 > m_bitcoin5 & m_bitcoin3 < . & m_bitcoin5 < .
* nv_bitcoin: number of monotonicity violations for bitcoin mp's, ranging from 0 to 6
gen nv_bitcoin = v_bitcoin_15 + v_bitcoin_16 + v_bitcoin_24 + v_bitcoin_26 + v_bitcoin_34 + v_bitcoin_35

* nv_tot: sum of number of monotonicity violations for the 4 sources, out of 24 (range: 0-24)
gen nv_tot = nv_aex + nv_stock + nv_msci + nv_bitcoin
label var nv_tot "Number of set inclusion monotonicity violations, out of 24 possible"
summ nv_tot 
tab nv_tot 


*********************************************************************
****** Dummies for Weak Monotonicity violations 
***********************************************************************

* Generate a dummy for weak monotonicity violations (a > 1), 
* a dummy for over-sensitibity (a < 0), and 
* a dummy for a-index within bounds for perceived ambiguity (0 ≤ a ≤ 1)
*
* a_avg
gen dgt1_a_avg = 0 if a_avg < .
replace dgt1_a_avg = 1 if a_avg > 1 & a_avg < .
label variable dgt1_a_avg "Dummy a_avg > 1"
gen dlt0_a_avg = 0 if a_avg < .
replace dlt0_a_avg = 1 if a_avg < 0
label variable dlt0_a_avg "Dummy a_avg < 0"
gen db01_a_avg = 1 - dgt1_a_avg - dlt0_a_avg
label variable db01_a_avg "Dummy 0 ≤ a_avg ≤ 1"
* AEX
gen dgt1_a_aex = 0 if a_aex < .
replace dgt1_a_aex = 1 if a_aex > 1 & a_aex < .
label variable dgt1_a_aex "Dummy a_aex > 1"
gen dlt0_a_aex = 0 if a_aex < .
replace dlt0_a_aex = 1 if a_aex < 0
label variable dlt0_a_aex "Dummy a_aex < 0"
gen db01_a_aex = 1 - dgt1_a_aex - dlt0_a_aex
label variable db01_a_aex "Dummy 0 ≤ a_aex ≤ 1"
* Stock
gen dgt1_a_stock = 0 if a_stock < .
replace dgt1_a_stock = 1 if a_stock > 1 & a_stock < .
label variable dgt1_a_stock "Dummy a_stock > 1"
gen dlt0_a_stock = 0 if a_stock < .
replace dlt0_a_stock = 1 if a_stock < 0
label variable dlt0_a_stock "Dummy a_stock < 0"
gen db01_a_stock = 1 - dgt1_a_stock - dlt0_a_stock
label variable db01_a_stock "Dummy 0 ≤ a_stock ≤  1"
* MSCI 
gen dgt1_a_msci = 0 if a_msci < .
replace dgt1_a_msci = 1 if a_msci > 1 & a_msci < .
label variable dgt1_a_msci "Dummy a_msci > 1"
gen dlt0_a_msci = 0 if a_msci < .
replace dlt0_a_msci = 1 if a_msci < 0
label variable dlt0_a_msci "Dummy a_msci < 0"
gen db01_a_msci = 1 - dgt1_a_msci - dlt0_a_msci
label variable db01_a_msci "Dummy 0 ≤ a_msci ≤ 1"
* Bitcoin
gen dgt1_a_bitcoin = 0 if a_bitcoin < .
replace dgt1_a_bitcoin = 1 if a_bitcoin > 1 & a_bitcoin < .
label variable dgt1_a_bitcoin "Dummy a_bitcoin > 1"
gen dlt0_a_bitcoin = 0 if a_bitcoin < .
replace dlt0_a_bitcoin = 1 if a_bitcoin < 0
label variable dlt0_a_bitcoin "Dummy a_bitcoin < 0"
gen db01_a_bitcoin = 1 - dgt1_a_bitcoin - dlt0_a_bitcoin
label variable db01_a_bitcoin "Dummy 0 ≤ a_bitcoin ≤ 1"



******************************************************************
****** Process the risk questions data (risk attitudes)
******************************************************************

* Calculate indifference amounts for the 4 RISK questions
forval i = 1/4 {
  g payoff_risk`i' = .
  replace payoff_risk`i' = 0.5 if Risk`i'_2 > Risk`i'_1
  replace payoff_risk`i' = 1.5 if Risk`i'_3 > Risk`i'_2
  replace payoff_risk`i' = 2.5 if Risk`i'_4 > Risk`i'_3
  replace payoff_risk`i' = 3.5 if Risk`i'_5 > Risk`i'_4
  replace payoff_risk`i' = 4.25 if Risk`i'_6 > Risk`i'_5
  replace payoff_risk`i' = 4.75 if Risk`i'_7 > Risk`i'_6
  replace payoff_risk`i' = 5.25 if Risk`i'_8 > Risk`i'_7
  replace payoff_risk`i' = 5.75 if Risk`i'_9 > Risk`i'_8
  replace payoff_risk`i' = 6.25 if Risk`i'_10 > Risk`i'_9
  replace payoff_risk`i' = 6.75 if Risk`i'_11 > Risk`i'_10
  replace payoff_risk`i' = 7.25 if Risk`i'_12 > Risk`i'_11
  replace payoff_risk`i' = 7.75 if Risk`i'_13 > Risk`i'_12
  replace payoff_risk`i' = 8.5 if Risk`i'_14 > Risk`i'_13
  replace payoff_risk`i' = 9.5 if Risk`i'_15 > Risk`i'_14
  replace payoff_risk`i' = 10.5 if Risk`i'_16 > Risk`i'_15
  replace payoff_risk`i' = 11.75 if Risk`i'_17 > Risk`i'_16
  replace payoff_risk`i' = 13.75 if Risk`i'_18 > Risk`i'_17
  replace payoff_risk`i' = 15 if Risk`i'_18 == 1
  replace payoff_risk`i' = 0 if Risk`i'_1 == 2
}

label variable payoff_risk1 "Indifference amount Risk1"
label variable payoff_risk2 "Indifference amount Risk2"
label variable payoff_risk3 "Indifference amount Risk3"
label variable payoff_risk4 "Indifference amount Risk4"
summ payoff_risk1 payoff_risk2 payoff_risk3 payoff_risk4 


* Create variables for mistakes (irrational answers) on the risk questions
forval i = 1/4 {
  gen ir_risk`i' = 0
  replace ir_risk`i' = . if Risk`i'_1 == . | Risk`i'_18 == .
  replace ir_risk`i' = 1 if Risk`i'_1 == 2
  replace ir_risk`i' = 1 if Risk`i'_18 == 1 
  
  summ ir_risk`i'
  tab ir_risk`i' Risk`i'_1
  tab ir_risk`i' Risk`i'_18
}

label variable ir_risk1 "Dummy irrational responses Risk1"
label variable ir_risk2 "Dummy irrational responses Risk2"
label variable ir_risk3 "Dummy irrational responses Risk3"
label variable ir_risk4 "Dummy irrational responses Risk4"
summ ir_risk1 ir_risk2 ir_risk3 ir_risk4 

* Number of irrational responses
gen nir_risk = ir_risk1 + ir_risk2 + ir_risk3 + ir_risk4
label variable nir_risk "Number of irrational responses to 4 risk questions"
summ nir_risk
tab nir_risk

* Dummy for 1 or more irrational responses
gen dir_risk = 0 if nir_risk < .
replace dir_risk = 1 if nir_risk > 0 & nir_risk < .
label variable dir_risk "Dummy for 1 or more irrational responses to 4 risk questions"
summ dir_risk
tab dir_risk nir_risk


* Risk premium variables
gen prem_risk1 = (7.5 - payoff_risk1)/7.5
gen prem_risk2 = (5.0 - payoff_risk2)/5.0
gen prem_risk3 = (2.5 - payoff_risk3)/2.5
gen prem_risk4 = (12.5 - payoff_risk4)/12.5
label variable prem_risk1 "Risk premium % Risk1 (50%)" 
label variable prem_risk2 "Risk premium % Risk2 (33%)" 
label variable prem_risk3 "Risk premium % Risk3 (17%)" 
label variable prem_risk4 "Risk premium % Risk4 (83%)" 

summ prem_risk1 prem_risk2 prem_risk3 prem_risk4

corr prem_risk1 prem_risk2 prem_risk3 prem_risk4
alpha prem_risk1 prem_risk2 prem_risk3 prem_risk4, item
pca prem_risk1 prem_risk2 prem_risk3 prem_risk4, factors(2)

* Risk aversion measure: average of the 4 risk premiums
gen avg_riskprem = (prem_risk1 + prem_risk2 + prem_risk3 + prem_risk4)/4
label variable avg_riskprem "Risk aversion, average % premium of 4 risk questions"
summ avg_riskprem

* Alternative risk aversion proxy: average of Risk1 (50%) and Risk4 (33%)
gen avg_riskprem12 = (prem_risk1 + prem_risk2)/2
label variable avg_riskprem12 "Risk aversion alt., average % premium of 50% and 33% risk questions"
summ avg_riskprem12

* Inverse-S prob. weighting measure: difference between % premium of Risk4 (83%) and Risk3 (17%)
gen InvS = prem_risk4 - prem_risk3
label variable InvS "Inverse-S, diff of % premium for 83% and 17% risk questions"
summ InvS
summ InvS, detail

* Create dummy variables indicating positive risk aversion ( 1 if > 0)
*
gen dpos_avg_riskprem = 0 if avg_riskprem < .
replace dpos_avg_riskprem = 1 if avg_riskprem > 0 & avg_riskprem < .
label variable dpos_avg_riskprem "Dummy for avg_riskprem positive (> 0)"
summ dpos_avg_riskprem

* Create dummy variable indicating positive InvS ( 1 if > 0)
*
gen dpos_InvS = 0 if InvS < .
replace dpos_InvS = 1 if InvS > 0 & InvS < .
label variable dpos_InvS "Dummy for InvS positive (> 0)"
summ dpos_InvS


******************************************************************
****** Estimate index b and a for RISK (risk attitude measures)
******************************************************************

* Set up the variables for the regression to estimate index b and a for risk
gen c_estrisk   = . 
gen s_estrisk   = . 
gen r2_estrisk  = . 
gen r2a_estrisk = . 
gen yv = .
gen xp = .
gen errsq = .
replace xp = 0.50 in 1
replace xp = 0.33 in 2
replace xp = 0.17 in 3
replace xp = 0.83 in 4
	 
*** Reset the coefficients
***
replace c_estrisk  = . 
replace s_estrisk  = . 
replace r2_estrisk = . 
replace r2a_estrisk = . 
	 
* Loop over all respondents
*	 
local Nobs = _N
forval r = 1/`Nobs'{
      
  * Get the number of responses for the 4 risk choice lists.
  * This sum will be missing if the respondent did not complete all 4 questions
  *
  local sum = payoff_risk1 + payoff_risk2 + payoff_risk3 + payoff_risk4 in `r'
  	 
  * Only do the estimation for respondents with complete data for the 4 risk choice lists
  *
  if (`sum' < .) { 
  
    * Reset the tempvars 
	*
	qui replace yv    = .
    qui replace errsq = .
	    
	* Loop over the 4 risk choice list to prepare the data for estimating the linear prob. weighting function.
	* This means looking up 4 indifference values of the respondent for the 4 risk choice lists 
	*
	forval j = 1/4 {
	
	    * Look up the indifference value for respondent r on risk choice list j
	    * Save it in variable yv, after dividing by 15
	    local sr   = payoff_risk`j' in `r'  
	    qui replace yv = `sr'/15  in `j'
	  	  	  	    	   	  	   
	}
	* end of loop
	
	* By now row j= 1 to 4 of the data file contain the following information for respondent r
	*
	* yv = rescaled indifference value y_j for risk choice list j (= 1 - 4), divided by 15
	* xp = probability of winning for risk choice list j (= 3 - 6)
	*
	* yv = CE / 15 = c + s*p
	* 
		
    * Estimation 1
	*
	* OLS to estimate coefficients c and s of the neo-additive weighting function
	*
	display "Estimation 1 for respondent =" `r' 
	reg yv xp
	
	* Store the coefficient estimates and r2
	qui replace c_estrisk   = _b[_cons] in `r'
	qui replace s_estrisk   = _b[xp] in `r'
	qui replace r2_estrisk  =  e(r2) in `r'
	qui replace r2a_estrisk =  e(r2_a) in `r'
									  
  }
  * end of if statement for having data

}
* end of loop over respondents
	   		

* Create index a and b for risk
*
gen b_risk = 1 - s_estrisk - 2*c_estrisk
gen a_risk = 1 - s_estrisk
label variable b_risk "Index b for risk"
label variable a_risk "Index a for risk"
summ b_risk a_risk

* Show the correlations with index a and b for ambiguity
*
corr a_risk InvS 
corr b_risk avg_riskprem12 

* Create dummy variables indicating positive risk aversion (1 if b_risk > 0)
*
gen dpos_b_risk = 0 if b_risk < .
replace dpos_b_risk = 1 if b_risk > 0 & b_risk < .
label variable dpos_b_risk "Dummy for risk aversion (b_risk > 0)"
summ dpos_b_risk

* Create dummy variable indicating positive Inverse-S ( 1 if a_risk > 0)
*
gen dpos_a_risk = 0 if a_risk < .
replace dpos_a_risk = 1 if a_risk > 0 & a_risk < .
label variable dpos_a_risk "Dummy for Inverse-S (a_risk > 0)"
summ dpos_a_risk




******************************************************************
****** Familiarity with Ambiguity Sources  
******************************************************************

* Generate 0/1 dummies for MSCI
*
* 	dknow_msci: familiar with MSCI World
*
gen dknow_msci = 1 if AMBIGUITY_MSCI_BEGIN_1 == 1
replace dknow_msci = 0 if AMBIGUITY_MSCI_BEGIN_1 > 1 & AMBIGUITY_MSCI_BEGIN_1 < .
label variable dknow_msci "Familiar with MSCI World"

*   dmsci_ever: ever invested in MSCI World, either currently or in the past
gen dmsci_ever = 0 if AMBIGUITY_MSCI_BEGIN_1 < .
replace dmsci_ever = 1 if AMBIGUITY_MSCI_BEGIN_2 == 2 | AMBIGUITY_MSCI_BEGIN_2 == 3
label variable dmsci_ever "Invest(ed) in MSCI World, now or in the past"

*   dmsci_now : currently investing in MSCI World 
gen dmsci_now = 0 if AMBIGUITY_MSCI_BEGIN_1 < .
replace dmsci_now = 1 if AMBIGUITY_MSCI_BEGIN_2 == 3
label variable dmsci_now "Currently invests in MSCI World"

*   dmsci_past : in the past invested in MSCI World 
gen dmsci_past = 0 if AMBIGUITY_MSCI_BEGIN_1 < .
replace dmsci_past = 1 if  AMBIGUITY_MSCI_BEGIN_2 == 2
label variable dmsci_past "In the past invested in MSCI World"

summ dknow_msci dmsci_ever dmsci_now dmsci_past


* Generate 0/1 dummies for familiar individual stock
*
* 	dknow_stock : 1 = mentioned the name of a familiar listed company stock (a valid one),
*                 0 = don't know, or refuse (then "Philips" was used for the questions)
*
gen dknow_stock = 0 if AMBIGUITY_STOCK_INTRODUCTION == "Ik weet het niet" | AMBIGUITY_STOCK_INTRODUCTION == "Ik wil het niet zeggen"
replace dknow_stock = 1 if AMBIGUITY_STOCK_INTRODUCTION ~= "Ik weet het niet" & AMBIGUITY_STOCK_INTRODUCTION ~= "Ik wil het niet zeggen"
label variable dknow_stock "Knows a familiar indivual stock"

* For those who mentioned an invalid stock name (e.g., Bitcoin, "none")
* we set the dknow_stock dummy equal to 0
replace dknow_stock = 0 if dstock_invalid == 1

* Note: the 16 respondents with an empty stock name (" ") did not complete this part of the survey
replace dknow_stock = . if AMBIGUITY_STOCK_INTRODUCTION == " " 

summ dknow_stock
tab AMBIGUITY_STOCK_INTRODUCTION 
tab AMBIGUITY_STOCK_INTRODUCTION if dknow_stock == 0
tab AMBIGUITY_STOCK_INTRODUCTION if dknow_stock == 1

*   dstock_ever : did you ever invested in the individual stock, either currently or in the past
gen dstock_ever = 0 if AMBIGUITY_STOCK_BEGIN_1 < .
replace dstock_ever = 1 if AMBIGUITY_STOCK_BEGIN_1 == 1 | AMBIGUITY_STOCK_BEGIN_2 == 1
replace dstock_ever = . if dstock_invalid == 1 
label variable dstock_ever "Invest(ed) in familiar stock, now or in the past"

*   dstock_now  : currently investing in the individual stock
gen dstock_now = 0 if AMBIGUITY_STOCK_BEGIN_1 < . 
replace dstock_now = 1 if AMBIGUITY_STOCK_BEGIN_1 == 1 
replace dstock_now = . if dstock_invalid == 1 
label variable dstock_now "Currently invests in the familiar stock"

*   dstock_past : in the past invested in the individual stock
gen dstock_past = 0 if AMBIGUITY_STOCK_BEGIN_2 < . | AMBIGUITY_STOCK_BEGIN_1 == 1
replace dstock_past = 1 if AMBIGUITY_STOCK_BEGIN_2 == 1
replace dstock_past = . if dstock_invalid == 1 
label variable dstock_past "In the past invested in the familiar stock"

summ dknow_stock dstock_ever dstock_now dstock_past


* Generate 0/1 dummies for Bitcoin
*
* 	dknow_bitcoin : familiar with Bitcoin
*
gen dknow_bitcoin = 1 if AMBIGUITY_BITCOIN_BEGIN_1 == 1
replace dknow_bitcoin = 0 if AMBIGUITY_BITCOIN_BEGIN_1 > 1 & AMBIGUITY_BITCOIN_BEGIN_1 < .
label variable dknow_bitcoin "Knows bitcoin"

*   dcrypto_ever  : did you ever invested in Bitcoin or other crypto-currencies, either currently or in the past
gen dcrypto_ever = 1 if AMBIGUITY_BITCOIN_BEGIN_2 == 2 | AMBIGUITY_BITCOIN_BEGIN_2 == 3
replace dcrypto_ever = 0 if AMBIGUITY_BITCOIN_BEGIN_2 == 1 | dknow_bitcoin == 0
label variable dcrypto_ever "Invest(ed) in crypto currency, now or in the past"

*   dcrypto_now   : currently investing in Bitcoin or other crypto-currencies
gen dcrypto_now = 1 if AMBIGUITY_BITCOIN_BEGIN_2 == 3
replace dcrypto_now = 0 if AMBIGUITY_BITCOIN_BEGIN_2 <= 2 | dknow_bitcoin == 0
label variable dcrypto_now "Currently invests in crypto currency"

*   dcrypto_now   : in the past invested in Bitcoin or other crypto-currencies
gen dcrypto_past = 0 if AMBIGUITY_BITCOIN_BEGIN_2 < . | dknow_bitcoin == 0
replace dcrypto_past = 1 if AMBIGUITY_BITCOIN_BEGIN_2 == 2
label variable dcrypto_past "In the past invested in crypto currency"

summ dknow_bitcoin dcrypto_ever dcrypto_now dcrypto_past


* Calculate a score for the number of investments the repondent knows:
* know_score = dknow_stock + dknow_msci + dknow_bitcoin
* Ranges from 0 to 3.
*
gen know_score = dknow_stock + dknow_msci + dknow_bitcoin
label variable know_score "Number of familiar investments (out of 3: stock, msci, bitcoin)"
tab know_score 


******************************************************************
****** Investment access and experience variables
******************************************************************

* Generate 0/1 dummies for investing ("Do you invest?")
*
* 	dinv_indep    : Invests independently, without access to a financial advisor
*
gen dinv_indep = S01_1_ 
label variable dinv_indep "Invests independently (without advisor)"

* 	dinv_advisor  : Invests with access to a financial advisor
gen dinv_advisor = S01_2_ 
label variable dinv_advisor "Invests with access to financial advisor"

* 	dinv_managed  : Investments are managed by a bank/broker. Does not trade himself.
gen dinv_managed = S01_3_
label variable dinv_managed "Broker or bank manages the investments (does not trade himself)"

*   dinvest       : Invests, either independently, with advise, or managed by broker
gen dinvest = 1 - S01_4_
label variable dinvest "Invests either independently, with advise, or managed by broker"

*   dinv_notman   : Invests with or without advisor (2 groups combined), but not managed
gen dinv_notman = 0 if S01_1 < .
replace dinv_notman = 1 if S01_1_ == 1 | S01_2_ == 1 
label variable dinv_notman "Invests with or without advisor (2 groups combined), but not managed"

summ dinvest dinv_indep dinv_advisor dinv_managed dinv_notman
tab S01_1_ 
tab S01_2_ 
tab S01_3_ 
tab S01_4_ 

* yearsinvest : number of years of investment experience, and 0 for non-investors
*
gen yearsinvest = 0 if Q01B == .
replace yearsinvest = 0 if Q01B == -9 | Q01B == -8
replace yearsinvest = Q01B if Q01B >= 0 & Q01B < .
replace yearsinvest = 0 if Q01Ba == 1
replace yearsinvest = 3 if Q01Ba == 3
replace yearsinvest = 20 if Q01Ba == 4  /* Median for experience > 5 group is 20 years */
label variable yearsinvest "Investment experience in years, with 0 for non-investors"

summ yearsinvest
summ yearsinvest if dinvest == 1
summ yearsinvest if dinvest == 0
summ yearsinvest if  Q01B == .
tab yearsinvest if  Q01B == -8 | Q01B == -9


******************************************************************
****** Financial literacy score 
******************************************************************

* Calculate Financial Numeracy Score, the total number of correct answers
*
g NUM1_Correct = 0 if FINANCIAL_NUMERACY_1 < .
replace NUM1_Correct = 1 if FINANCIAL_NUMERACY_1 == 1
label variable NUM1_Correct "Correctly answer the first basic Financial Numeracy question"
g NUM2_Correct = 0 if FINANCIAL_NUMERACY_2 < .
replace NUM2_Correct = 1 if FINANCIAL_NUMERACY_2 == 1
label variable NUM2_Correct "Correctly answer the second Financial Numeracy question"
g NUM3_Correct = 0 if FINANCIAL_NUMERACY_3 < .
replace NUM3_Correct = 1 if FINANCIAL_NUMERACY_3 == 2
label variable NUM3_Correct "Correctly answer the third Financial Numeracy question"
g NUM = NUM1_Correct + NUM2_Correct + NUM3_Correct
label variable NUM "Financial Numeracy score (0/3)"
tab NUM

* Number of "Dont Know" answers 
*
g NUM1_DK = 0 if FINANCIAL_NUMERACY_1 < .
replace NUM1_DK = 1 if FINANCIAL_NUMERACY_1 == 4
label variable NUM1_DK "Do not know answer to the first basic Financial Numeracy question"
g NUM2_DK = 0 if FINANCIAL_NUMERACY_2 < .
replace NUM2_DK = 1 if FINANCIAL_NUMERACY_2 == 4
label variable NUM2_DK "Do not know answer to the second Financial Numeracy question"
g NUM3_DK = 0 if FINANCIAL_NUMERACY_3 < .
replace NUM3_DK = 1 if FINANCIAL_NUMERACY_3 == 4
label variable NUM3_DK "Do not know answer to  the third Financial Numeracy question"
g NUM_DK = NUM1_DK + NUM2_DK + NUM3_DK
label variable NUM_DK "Number of dont know answers to the Financial Numeracy questions (0/3)"
tab NUM_DK

* Calculate Financial Literacy Score, the total number of correct answers
*
g FL1_Correct = 0 if Q19 < .
replace FL1_Correct = 1 if Q19 == 1
label variable FL1_Correct "Correctly answer the first Financial Literacy question"
g FL2_Correct = 0 if Q20 < .
replace FL2_Correct = 1 if Q20 == 3
label variable FL2_Correct "Correctly answer the second Financial Literacy question"
g FL3_Correct = 0 if Q21 < .
replace FL3_Correct = 1 if Q21 == 2
label variable FL3_Correct "Correctly answer the third Financial Literacy question"
g FL4_Correct = 0 if Q22 < .
replace FL4_Correct = 1 if Q22 == 3
label variable FL4_Correct "Correctly answer the forth Financial Literacy question"
g FL5_Correct = 0 if Q23 < .
replace FL5_Correct = 1 if Q23 == 1
label variable FL5_Correct "Correctly answer the fifth Financial Literacy question"
g FL6_Correct = 0 if Q24 < .
replace FL6_Correct = 1 if Q24 == 2
label variable FL6_Correct "Correctly answer the sixth Financial Literacy question"
g FL7_Correct = 0 if Q25 < .
replace FL7_Correct = 1 if Q25 == 3
label variable FL7_Correct "Correctly answer the seventh Financial Literacy question"
g FL8_Correct = 0 if Q26 < .
replace FL8_Correct = 1 if Q26 == 2
label variable FL8_Correct "Correctly answer the eighth Financial Literacy question"
g FL9_Correct = 0 if Q27 < .
replace FL9_Correct = 1 if Q27 == 1
label variable FL9_Correct "Correctly answer the ninth Financial Literacy question"

g FL = FL1_Correct + FL2_Correct + FL3_Correct + FL4_Correct + FL5_Correct + FL6_Correct + FL7_Correct + FL8_Correct + FL9_Correct
label variable FL "Financial Literacy score (0/9)"

summ FL FL1_Correct FL2_Correct FL3_Correct FL4_Correct FL5_Correct FL6_Correct FL7_Correct FL8_Correct FL9_Correct

* Number of "Dont Know" answers 
*
g FL1_DK = 0 if Q19 < .
replace FL1_DK = 1 if Q19 == 4
label variable FL1_DK "Dont know answer to the first Financial Literacy question"
g FL2_DK = 0 if Q20 < .
replace FL2_DK = 1 if Q20 == 4
label variable FL2_DK "Dont know answer to the second Financial Literacy question"
g FL3_DK = 0 if Q21 < .
replace FL3_DK = 1 if Q21 == 3
label variable FL3_DK "Dont know answer to the third Financial Literacy question"
g FL4_DK = 0 if Q22 < .
replace FL4_DK = 1 if Q22 == 5
label variable FL4_DK "Dont know answer to the forth Financial Literacy question"
g FL5_DK = 0 if Q23 < .
replace FL5_DK = 1 if Q23 == 5
label variable FL5_DK "Dont know answer to the fifth Financial Literacy question"
g FL6_DK = 0 if Q24 < .
replace FL6_DK = 1 if Q24 == 5
label variable FL6_DK "Dont know answer to the sixth Financial Literacy question"
g FL7_DK = 0 if Q25 < .
replace FL7_DK = 1 if Q25 == 4
label variable FL7_DK "Dont know answer to the seventh Financial Literacy question"
g FL8_DK = 0 if Q26 < .
replace FL8_DK = 1 if Q26 == 4
label variable FL8_DK "Dont know answer to the eighth Financial Literacy question"
g FL9_DK = 0 if Q27 < .
replace FL9_DK = 1 if Q27 == 3
label variable FL9_DK "Dont know answer to the ninth Financial Literacy question"

g FL_DK = FL1_DK + FL2_DK + FL3_DK + FL4_DK + FL5_DK + FL6_DK + FL7_DK + FL8_DK + FL9_DK
label variable FL_DK "Number of dont know answers to Financial Literacy questions (0/9)"

summ FL_DK FL1_DK FL2_DK FL3_DK FL4_DK FL5_DK FL6_DK FL7_DK FL8_DK FL9_DK


* Calculate basic financial literacy score, using 5 questions, as in Van Rooij et al. (2012)
*
g FL_Basic = NUM1_Correct + NUM2_Correct + NUM3_Correct + FL1_Correct + FL2_Correct
label variable FL_Basic "Basic Financial Literacy score (0/5)"
summ FL_Basic

* Calculate advanced financial literacy score, using 7 questions from Van Rooij et al. (2012)
*
g FL_Advanced = FL3_Correct + FL4_Correct + FL5_Correct + FL6_Correct + FL7_Correct + FL8_Correct + FL9_Correct
label variable FL_Advanced "Advanced Financial Literacy score (0/7)"
summ FL_Advanced


* Generate total financial literacy score, 
* for all 12 questions, basic and advanced combined. 
*
gen FL_Total = FL_Basic + FL_Advanced
label variable FL_Total "Financial Literacy Total Score (0-12)"
summ FL_Total


******************************************************************
****** SOCIO-DEMOGRAPHIC VARS 
******************************************************************

* Male dummy
* name: male_dummy
*
tab geslacht
gen male_dummy = 1 if geslacht == 1
replace male_dummy = 0 if geslacht == 2
label variable male_dummy "Male dummy (0/1)"
tab geslacht male_dummy
summ male_dummy
gen dfemale = geslacht - 1
label variable dfemale "Female dummy"
summ dfemale

* Age (in years)
* Name: age
*
gen age = leeftijd
label variable age "Age (in years)"
summ age leeftijd

* Single dummy
* name: single_dummy
*
tab partner
gen single_dummy = 1 if partner == 0
replace single_dummy = 0 if partner == 1
label variable single_dummy "Single dummy (0/1)"
tab single_dummy partner
summ single_dummy

* Number of children
* name: children
*
gen children = aantalki
gen logchildren = ln(1 + aantalki) if aantalki >= 0 & aantalki < .
label variable children "Number of kids" 
label variable logchildren "Number of kids (log)"
summ children logchildren 

* Log of net househould income
* name: lognettohh
*
gen lognettohh = ln(1 + nettohh) if nettohh >= 0 & nettohh < .
summ lognettohh

* Education dummies
* Name: edu_low, edu_mid, edu_high (= edu_highbachelor + edu_highmaster)
*
tab oplcat
gen edu_low = 1 if oplcat <= 2
replace edu_low = 0 if oplcat > 2 & oplcat < .
gen edu_mid = 0 if oplcat < .
replace edu_mid = 1 if oplcat >= 3 & oplcat <= 4
gen edu_hibachelor = 0 if oplcat < .
replace edu_hibachelor = 1 if oplcat == 5
gen edu_himaster = 0 if oplcat < .
replace edu_himaster = 1 if oplcat == 6
label variable edu_low "Low education dummy (0/1)"
label variable edu_mid "High school education dummy (0/1)"
label variable edu_hibachelor "Bachelor education dummy (0/1)"
label variable edu_himaster "Master education dummy (0/1)"
gen edu_high = edu_hibachelor + edu_himaster 
label variable edu_high "High education dummy, bachelor or master (0/1)"
cap drop temp
gen temp = edu_low + edu_mid + edu_hibachelor + edu_himaster
summ edu_low edu_mid edu_hibachelor edu_himaster temp
summ edu_hibachelor edu_himaster edu_high 

* Employment dummies
* Name: occ_regular (regular employment),
*       occ_buss    (business owner, or self-employed) 
*       occ_retired (retired)
*       occ_unemp   (unemployed, disabled, student, houseman/wife, and other)
*
tab belbezig
gen occ_regular = 0 if belbezig < .
replace occ_regular = 1 if belbezig == 1 | belbezig == 2
gen occ_buss  = 0 if belbezig < .
replace occ_buss = 1 if belbezig == 3
gen occ_retired = 0 if belbezig < .
replace occ_retired = 1 if belbezig == 8
gen occ_unemp = 1 if belbezig >= 4 & belbezig ~= 8 & belbezig < .
replace occ_unemp = 0 if belbezig <= 3 | belbezig == 8
replace occ_unemp = 0 if belbezig <= 3 | belbezig == 8
gen occ_empl = 0 if belbezig < .
replace occ_empl = 1 if occ_regular == 1 | occ_buss == 1
label variable occ_regular "Regular employment dummy (0/1)"
label variable occ_buss "Business owner dummy (0/1)"
label variable occ_retired "Retired dummy (0/1)"
label variable occ_unemp "Unemployed/disabled/other dummy (0/1)"
label variable occ_empl "Employed dummy (0/1), regular, self-employed, or business owner"
cap drop temp
gen temp = occ_regular + occ_buss + occ_unemp + occ_retired
summ occ_regular occ_buss occ_retired occ_unemp temp

* Create a unique household member identifier number, 
* to use as a key when merging in other DHS datasets
*
gen noid = 100*nohhold + nomem
order noid, after(nomem)
label variable noid "DHS member identifier"
sort noid
drop if noid == .

* Save the dataset
save $fn_main, replace



************************************************************************
******* Merge in the CentERdata panel member selection datafile 
************************************************************************

** Open the CentERdata selection datafile with information about which panel members
** were sent the survey. It contains a variable "beleg" (invest) that indicates whether 
** the panel member had any investments in the 2017 or 2016 DNB asset surveys.
** 
** At the time of the survey, the 2017 DHS assets survey was not complete yet,
** and therefore the dummy for having investments (beleg) is slightly different
** from what is recorded now in the DHS 2017 and 2016 asset surveys (only for 6 people)
** 
use "Selectie_RiskAmbiguityp.dta", clear

* Create a sum of the asset ownership dummies in the file
* variable: bzsum
* bz12: dummy for having mutual fund investments
* bz13: dummy for having bonds 
* bz14: dummy for having stocks
* bz15: dummy for having long put options 
* bz16: dummy for having long call options 
* bz17: dummy for having short put options 
* bz18: dummy for having short call options 
gen bzsum = bz12 + bz13 + bz14 + bz15 + bz16 + bz17 + bz18
label variable bzsum "Number of investment types owned in 2017 or 2026 DHS, when the module was fielded"
tab bzsum 

* Update the variable "beleg" that keeps track who owned some investments
* to become a dummy: 1 for those having some investments, 0 for others
* 
replace beleg = 0 if bzsum == 0
label variable beleg "Dummy for had investments in 2017 or 2016 DHS assets survey when module was fielded"
tab beleg

* Create a unique household member identifier number, 
* to use as a key when merging with other DHS datasets
*
gen noid = 100*nohhold + nomem
label variable noid "DHS member identifier"
sort noid
order noid, after(nomem)

* Save the processed data 
*
save "Selectie_RiskAmbiguity_Processed.dta", replace 

* Open the main dataset
use $fn_main, clear

* Merge in the "beleg" indicator for panel members owning some investments 
* in the 2017 or 2016 DHS surveys, measured at the time when the Risk & Ambiguity 
* module was fieled by CentERdata
* 
merge 1:1 noid using "Selectie_RiskAmbiguity_Processed.dta"
drop if _merge==2
drop _merge


************************************************************************
**** Create confidence intervals for index b and a in the investor sample 
************************************************************************

* Generate dummy variable for ambiguity averse, seeking and neutral. 
*
* Firts create confidence intervals around 0 for index b and a
* in the sample of investors with full data on ambiguity and risk
*
* Classified as neutral when b_index is within 95% confidence interval around 0.
* Positive (averse) or negative (seeking) counted only when b_index is outside 
* the 95% confidence interval around 0. 
*

* Set the sample screen: investors with complete ambiguity and risk data
local screen_temp "beleg == 1 & b_aex < . & b_stock < . & b_msci < . & b_bitcoin < . & a_aex < . & a_stock < . & a_msci < . & a_bitcoin < . & avg_riskprem < ."

* AEX: create dummy variables for ambiguity averse (sig.), seeking (sig.), and neutral
summ b_aex if `screen_temp', detail
display invttail(r(N)-1,0.025)
gen b_aex_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
gen dspos_b_aex = 0 if b_aex < .
gen dsneg_b_aex = 0 if b_aex < .
gen dzero_b_aex = 0 if b_aex < .
replace dspos_b_aex = 1 if b_aex > b_aex_bound & b_aex < .
replace dsneg_b_aex = 1 if b_aex < -b_aex_bound 
replace dzero_b_aex = 1 if b_aex >= -b_aex_bound & b_aex <= b_aex_bound
label variable dspos_b_aex "Dummmy b_aex positive (significant)"
label variable dsneg_b_aex "Dummmy b_aex negative (significant)"
label variable dzero_b_aex "Dummmy b_aex zero (insignificant)"
summ dspos_b_aex dzero_b_aex dsneg_b_aex if `screen_temp'

* Stock: create dummy variables for ambiguity averse (sig.), seeking (sig.), and neutral
summ b_stock if `screen_temp', detail
gen b_stock_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
summ b_stock_bound
gen dspos_b_stock = 0 if b_stock < .
gen dsneg_b_stock = 0 if b_stock < .
gen dzero_b_stock = 0 if b_stock < .
replace dspos_b_stock = 1 if b_stock > b_stock_bound & b_stock < .
replace dsneg_b_stock = 1 if b_stock < -b_stock_bound 
replace dzero_b_stock = 1 if b_stock >= -b_stock_bound & b_stock <= b_stock_bound
label variable dspos_b_stock "Dummmy b_stock positive (significant)"
label variable dsneg_b_stock "Dummmy b_stock negative (significant)"
label variable dzero_b_stock "Dummmy b_stock zero (insignificant)"
summ dspos_b_stock dzero_b_stock dsneg_b_stock if `screen_temp'

* MSCI: create dummy variables for ambiguity averse (sig.), seeking (sig.), and neutral
summ b_msci if `screen_temp', detail
gen b_msci_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
summ b_msci_bound
gen dspos_b_msci = 0 if b_msci < .
gen dsneg_b_msci = 0 if b_msci < .
gen dzero_b_msci = 0 if b_msci < .
replace dspos_b_msci = 1 if b_msci > b_msci_bound & b_msci < .
replace dsneg_b_msci = 1 if b_msci < -b_msci_bound 
replace dzero_b_msci = 1 if b_msci >= -b_msci_bound & b_msci <= b_msci_bound 
label variable dspos_b_msci "Dummmy b_msci positive (significant)"
label variable dsneg_b_msci "Dummmy b_msci negative (significant)"
label variable dzero_b_msci "Dummmy b_msci zero (insignificant)"
summ dspos_b_msci dzero_b_msci dsneg_b_msci if `screen_temp'

* Bitcoin: create dummy variables for ambiguity averse (sig.), seeking (sig.), and neutral
summ b_bitcoin if `screen_temp', detail
gen b_bitcoin_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
gen dspos_b_bitcoin = 0 if b_bitcoin < .
gen dsneg_b_bitcoin = 0 if b_bitcoin < .
gen dzero_b_bitcoin = 0 if b_bitcoin < .
replace dspos_b_bitcoin = 1 if b_bitcoin > b_bitcoin_bound & b_bitcoin < .
replace dsneg_b_bitcoin = 1 if b_bitcoin < -b_bitcoin_bound 
replace dzero_b_bitcoin = 1 if b_bitcoin >= -b_bitcoin_bound & b_bitcoin <= b_bitcoin_bound
label variable dspos_b_bitcoin "Dummmy b_bitcoin positive (significant)"
label variable dsneg_b_bitcoin "Dummmy b_bitcoin negative (significant)"
label variable dzero_b_bitcoin "Dummmy b_bitcoin zero (insignificant)"
summ dspos_b_bitcoin dzero_b_bitcoin dsneg_b_bitcoin if `screen_temp'

* b_avg, average of b-index over 4 sources: 
* create dummy variables for ambiguity averse (sig.), seeking (sig.), and neutral
summ b_avg if `screen_temp', detail
gen b_avg_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
gen dspos_b_avg = 0 if b_avg < .
gen dsneg_b_avg = 0 if b_avg < .
gen dzero_b_avg = 0 if b_avg < .
replace dspos_b_avg = 1 if b_avg > b_avg_bound & b_avg < .
replace dsneg_b_avg = 1 if b_avg < -b_avg_bound 
replace dzero_b_avg = 1 if b_avg >= -b_avg_bound & b_avg <= b_avg_bound
label variable dspos_b_avg "Dummmy b_avg positive (significant)"
label variable dsneg_b_avg "Dummmy b_avg negative (significant)"
label variable dzero_b_avg "Dummmy b_avg zero (insignificant)"
summ dspos_b_avg dzero_b_avg dsneg_b_avg if `screen_temp'


* Generate dummy variable for positive, zero and negative a_index (perceived ambiguity ).
* Classified as zero when a_index is within 95% confidence interval around 0.
* Positive (perceived ambiguity) or negative (cannot be interpreted as perceived ambiguity; over-a-sensitivity) 
* counted only when a_index is outside the 95% confidence interval around 0.
*

* AEX: create dummy variables for a-index positive (sig.), zero, and negative (sig.) 
summ a_aex if `screen_temp', detail
gen a_aex_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
gen dspos_a_aex = 0 if a_aex < .
gen dsneg_a_aex = 0 if a_aex < .
gen dzero_a_aex = 0 if a_aex < .
replace dspos_a_aex = 1 if a_aex > a_aex_bound & a_aex < .
replace dsneg_a_aex = 1 if a_aex < -a_aex_bound
replace dzero_a_aex = 1 if a_aex >= -a_aex_bound & a_aex <= a_aex_bound
label variable dspos_a_aex "Dummmy a_aex positive (significant)"
label variable dsneg_a_aex "Dummmy a_aex negative (significant)"
label variable dzero_a_aex "Dummmy a_aex zero (insignificant)"
summ dspos_a_aex dzero_a_aex dsneg_a_aex if `screen_temp'

* Stock: create dummy variables for a-index positive (sig.), zero, and negative (sig.) 
summ a_stock if `screen_temp', detail
gen a_stock_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
gen dspos_a_stock = 0 if a_stock < .
gen dsneg_a_stock = 0 if a_stock < .
gen dzero_a_stock = 0 if a_stock < .
replace dspos_a_stock = 1 if a_stock > a_stock_bound & a_stock < .
replace dsneg_a_stock = 1 if a_stock < -a_stock_bound 
replace dzero_a_stock = 1 if a_stock >= -a_stock_bound & a_stock <= a_stock_bound 
label variable dspos_a_stock "Dummmy a_stock positive (significant)"
label variable dsneg_a_stock "Dummmy a_stock negative (significant)"
label variable dzero_a_stock "Dummmy a_stock zero (insignificant)"
summ dspos_a_stock dzero_a_stock dsneg_a_stock if `screen_temp'

* MSCI: create dummy variables for a-index positive (sig.), zero, and negative (sig.) 
summ a_msci if `screen_temp', detail
gen a_msci_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
gen dspos_a_msci = 0 if a_msci < .
gen dsneg_a_msci = 0 if a_msci < .
gen dzero_a_msci = 0 if a_msci < .
replace dspos_a_msci = 1 if a_msci > a_msci_bound & a_msci < .
replace dsneg_a_msci = 1 if a_msci < -a_msci_bound 
replace dzero_a_msci = 1 if a_msci >= -a_msci_bound & a_msci <= a_msci_bound 
label variable dspos_a_msci "Dummmy a_msci positive (significant)"
label variable dsneg_a_msci "Dummmy a_msci negative (significant)"
label variable dzero_a_msci "Dummmy a_msci zero (insignificant)"
summ dspos_a_msci dzero_a_msci dsneg_a_msci if `screen_temp'

* Bitcoin: create dummy variables for a-index positive (sig.), zero, and negative (sig.) 
summ a_bitcoin if `screen_temp', detail
gen a_bitcoin_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
gen dspos_a_bitcoin = 0 if a_bitcoin < .
gen dsneg_a_bitcoin = 0 if a_bitcoin < .
gen dzero_a_bitcoin = 0 if a_bitcoin < .
replace dspos_a_bitcoin = 1 if a_bitcoin > a_bitcoin_bound & a_bitcoin < .
replace dsneg_a_bitcoin = 1 if a_bitcoin < -a_bitcoin_bound 
replace dzero_a_bitcoin = 1 if a_bitcoin >= -a_bitcoin_bound & a_bitcoin <= a_bitcoin_bound 
label variable dspos_a_bitcoin "Dummmy a_bitcoin positive (significant)"
label variable dsneg_a_bitcoin "Dummmy a_bitcoin negative (significant)"
label variable dzero_a_bitcoin "Dummmy a_bitcoin zero (insignificant)"
summ dspos_a_bitcoin dzero_a_bitcoin dsneg_a_bitcoin if `screen_temp'

* a_avg, average of a-index over 4 sources: 
* create dummy variables for a-index positive (sig.), zero, and negative (sig.)
summ a_avg if `screen_temp', detail
gen a_avg_bound = invttail(r(N)-1,0.025)*r(sd)/sqrt(r(N))
gen dspos_a_avg = 0 if a_avg < .
gen dsneg_a_avg = 0 if a_avg < .
gen dzero_a_avg = 0 if a_avg < .
replace dspos_a_avg = 1 if a_avg > a_avg_bound & a_avg < .
replace dsneg_a_avg = 1 if a_avg < -a_avg_bound 
replace dzero_a_avg = 1 if a_avg >= -a_avg_bound & a_avg <= a_avg_bound
label variable dspos_a_avg "Dummmy a_avg positive (significant)"
label variable dsneg_a_avg "Dummmy a_avg negative (significant)"
label variable dzero_a_avg "Dummmy a_avg zero (insignificant)"
summ dspos_a_avg dzero_a_avg dsneg_a_avg if `screen_temp'



************************************************************************
**** Merge in the 2018 DHS assets data, amounts as of 31 Dec 2017
************************************************************************
*
* Merges in the final 2018 DHS asset data, from CentERdata.
* This data reports asset ownership as of 31 Dec 2017. 
*
merge 1:1 noid using "wealth2018_summary.dta"
drop if _merge==2
drop _merge


************************************************************************
**** Create financial assets variables, imputing missing values                           
************************************************************************

* Generate new log of financial assets variable
*
summ finass finasshh, detail
gen logfinass = log(1+finass)
gen logfinasshh = log(1+finasshh)
label variable logfinass "Log Financial Assets DHS 2018"
label variable logfinasshh "Log HH Financial Assets DHS 2018"


* Generate missing amount dummies
*
foreach var in logfinass logfinasshh { 
	gen `var'_md = (`var'>=.) 
    label var `var'_md "`var' missing data dummy"

}
summ logfinass logfinass_md
summ logfinass if logfinass_md == 1
summ logfinass if logfinass_md == 0
summ logfinasshh logfinasshh_md
summ logfinasshh if logfinasshh_md == 1
summ logfinasshh if logfinasshh_md == 0


* Impute missing log-assets amounts, based on age dfemale single_dummy edu_mid edu_high
*
foreach var in logfinass logfinasshh {
	gen `var'_ms = `var'
	label var `var'_ms "`var' imputed with (group) median"
	* Impute using ...
	*...the median of var reported by people of same gender, education, and age category
	egen `var'med = median(`var'), by(age dfemale single_dummy edu_mid edu_high) 
	replace `var'_ms = `var'med if (`var'_ms == . & `var'med<.)
	drop `var'med
	*...the overall median of income
	egen `var'med = median(`var')
	replace `var'_ms = `var'med if (`var'_ms == . & `var'med<.)
	drop `var'med
}
summ logfinass logfinass_ms logfinass_md
summ logfinass logfinass_ms if logfinass_md == 1
summ logfinass logfinass_ms if logfinass_md == 0
summ logfinasshh logfinasshh_ms logfinasshh_md
summ logfinasshh logfinasshh_ms if logfinasshh_md == 1
summ logfinasshh logfinasshh_ms if logfinasshh_md == 0


* Generate imputed financial assets, from the log-version
*
gen finass_ms = exp(logfinass_ms)-1
label variable finass_ms "Financial assets imputed (using logfinass_ms)"
gen finass_md = 0
replace finass_md = 1 if finass == .
label variable finass_md "Dummy missing financial assets amount"
tab finass_md logfinass_md
summ finass finass_ms finass_md 
summ finass finass_ms finass_md if finass_md == 0 
summ finass finass_ms finass_md if finass_md == 1


* Generate imputed household financial assets, from the log-version
*
gen finasshh_ms = exp(logfinasshh_ms)-1
label variable finasshh_ms "HH financial assets imputed (using logfinasshh_ms)"
gen finasshh_md = 0
replace finasshh_md = 1 if finasshh == .
label variable finasshh_md "Dummy missing hh financial assets amount"
tab finasshh_md logfinasshh_md
summ finasshh finasshh_ms finasshh_md logfinasshh_md 
summ finasshh finasshh_ms finasshh_md if finasshh_md == 0 
summ finasshh finasshh_ms finasshh_md if finasshh_md == 1


************************************************************************
**** Save the main dataset for the results in the article
************************************************************************

* Save the main dataset
save $fn_main, replace


************************************************************************
**** Reshape the Ambiguity Dataset to panel format             
************************************************************************

* Version 2-4, 2 December 2023
* 
* This code reshapes the Ambiguity Dataset to panel format,
* so that we can estimate random effects models

* List of variables to keep in the panel dataset
* We do not keep everything, as otherwise the panet dataset will become big
keep nohhold nomem noid beleg eva2t1 eva2t2 eva2t4 eva2t5 duur age dfemale single_dummy aantalki ///
     oplcat occ_regular occ_buss occ_retired occ_unemp occ_empl nettohh finasshh_ms b_risk dpos_b_risk a_risk dpos_a_risk FL_Total FL_Basic FL_Advanced  ///
	 b_risk a_risk logchildren lognettohh logfinasshh_ms logfinasshh_md dstock_now dcrypto_now dmsci_now ///
     prem_risk1 prem_risk2 prem_risk3 prem_risk4 payoff_risk1 payoff_risk2 payoff_risk3 payoff_risk4  ///
     avg_riskprem avg_riskprem12 InvS nir_risk dir_risk ir_risk1 ir_risk2 ir_risk3 ir_risk4 ///
	 dknow_stock dknow_msci know_score dknow_bitcoin dinv_indep dinv_advisor dinv_managed dinvest yearsinvest ///
     ms_aex mc_aex a_aex b_aex ms_msci mc_msci a_msci b_msci ms_stock mc_stock a_stock b_stock ms_bitcoin mc_bitcoin a_bitcoin b_bitcoin  b_avg a_avg ///
	 dsamerow_aex dsamerow_stock dsamerow_msci dsamerow_bitcoin nir_aex nir_stock nir_msci nir_bitcoin nir_risk ///
	 dstocks dmutualfunds dbonds dequity wstocks wequity wthousing wtfinass ///
	 eva2t1 eva2t2 eva2t3 eva2t3 eva2t4 eva2t5 /// 
	 nv_aex nv_stock nv_msci nv_bitcoin nv_tot nir_tot ns_samerow dge10min 

* The variable invid keeps track of the respondent's id number. 
*
gen invid = _n

* We create 4 variables named b1, b2, b3 and b4 with b-index for the 4 sources: 
* 1 = aex, 2 = stock, 3 = msci and 4 = bitcoin.
*
gen b1 = b_aex
gen b2 = b_stock
gen b3 = b_msci
gen b4 = b_bitcoin

* We create 4 variables named a1, a2, a3 and a4 with a-index for the 4 sources: 
* 1 = aex, 2 = stock, 3 = msci and 4 = bitcoin.
*
gen a1 = a_aex
gen a2 = a_stock
gen a3 = a_msci
gen a4 = a_bitcoin

* dsamerow1, 2, 3 and 4: dummies for always switching on the same row of the
* 6 choice lists for the 4 sources: 1 = aex, 2 = stock, 3 = msci and 4 = bitcoin.
*
gen dsamerow1 = dsamerow_aex
gen dsamerow2 = dsamerow_stock
gen dsamerow3 = dsamerow_msci
gen dsamerow4 = dsamerow_bitcoin

* nir1, 2, 3 and 4: the number of irrational (=incorrect) responses on the 6 choice 
* lists the 4 sources: 1 = aex, 2 = stock, 3 = msci and 4 = bitcoin.
*
gen nir1 = nir_aex
gen nir2 = nir_stock
gen nir3 = nir_msci
gen nir4 = nir_bitcoin

* nv1, 2, 3 and 4: the number of monotonicity violations (ms > mc) on the 6 choice 
* lists the 4 sources: 1 = aex, 2 = stock, 3 = msci and 4 = bitcoin.
*
gen nv1 = nv_aex
gen nv2 = nv_stock
gen nv3 = nv_msci
gen nv4 = nv_bitcoin

* We create 4 variables named riskprem1 - riskprem4 with the risk premiums
* for the 4 risk questions: 1 = 50% chance, 2 = 33%, 3 = 17% and 4 = 83%
*
gen riskprem1 = prem_risk1 
gen riskprem2 = prem_risk2 
gen riskprem3 = prem_risk3 
gen riskprem4 = prem_risk4 

* rprob: the probability of winning for the 4 risk questions:
* 1 = 50% chance, 2 = 33%, 3 = 17% and 4 = 83%
*
gen rprob1 = 0.50 
gen rprob2 = 0.33 
gen rprob3 = 0.17 
gen rprob4 = 0.83 

* riskdir: dummies for irrational (incorrect) responses on the 4 risk questions,
* 1 = 50% chance, 2 = 33%, 3 = 17% and 4 = 83%
*
gen riskdir1 = ir_risk1 
gen riskdir2 = ir_risk2 
gen riskdir3 = ir_risk3 
gen riskdir4 = ir_risk4


*** RESHAPE DATASET TO PANEL FORMAT ***

* Reshape the dataset to a panel format,
* where the "year" index (1, 2, 3, 4) keeps track of the 4 sources 
* (* 1 = aex, 2 = stock, 3 = msci and 4 = bitcoin),  
* and "invid" index is the respondent id number. 
*
reshape long b a dsamerow nir nv riskprem rprob riskdir, i(invid) j(year)


* Label the panel variables
label variable b "Index b (panel, 1=aex, 2=stock, 3=msci, 4=bitcoin)"
label variable a "Index a (panel, 1=aex, 2=stock, 3=msci, 4=bitcoin)"
label variable nv "Number of monotonicity violations per source (out of 6)"
label variable nir "Number of irrational/dominated responses per source (out of 6)"
label variable dsamerow "Dummy for same-row switching on all 6 choice list for the source"


* Note: the percentage premiums for 4 risk questions are now also in panel format, 
* and in that case the "year" (1, 2, 3, 4) keeps track of the 4 risk questions.
* with 1 = 50%, 2 = 33%, 3 = 17% and 4 = 83% chance of winning. 
*
label variable riskprem "Premium for risk questions (panel, 1=50%, 2=33%, 3=17%, 4=83%)"
label variable rprob "Chance of winning for risk questions (panel, 1=50%, 2=33%, 3=17%, 4=83%)"
label variable riskdir "Dummy for irrational/incorrect responses on the risk questions (panel)"


* Create a duplicate of the year index in 'source' (= 1, 2, ..., 4): 
* it just keeps track of the ambiguity question rounds. 
*
gen source = year
label variable source "Investment source (1=aex, 2=stock, 3=msci, 4=bitcoin)"
order source, a(year)

* Label the indicators for respondent (invid) and year 
*
label variable invid "Individual respondent number (1-571), cross-section dimension"
label variable year  "Source number (1-4), 'time' dimension of the panel"

* Generate 4 dummies for sources
tabulate source, gen(dum_source)
label variable dum_source1 "Dummy aex (source = 1)"
label variable dum_source2 "Dummy stock (source = 2)"
label variable dum_source3 "Dummy msci (source = 3)"
label variable dum_source4 "Dummy bitcoin (source = 4)"

* Generate 4 dummies for sources with easier names
gen d_aex     = dum_source1
gen d_stock   = dum_source2 
gen d_msci    = dum_source3
gen d_bitcoin = dum_source4
label variable d_aex "Dummy aex (source = 1)"
label variable d_stock "Dummy stock (source = 2)"
label variable d_msci "Dummy msci (source = 3)"
label variable d_bitcoin "Dummy bitcoin (source = 4)"
order dum_source1 dum_source2 dum_source3 dum_source4, a(source)

* Generate a panel variable keeping track of asset ownership: dinv
* dinv = na          for source == 1 (AEX; information not available, nobody owns it)
* dinv = dstock_now  for source == 2 (familiar stock)
* dinv = dmsci_now   for source == 3 (MSCI World)
* dinv = dcrypto_now for source == 4 (Bitcoin and other crypto-currencies)
*
gen dinv = 1 if dstock_now == 1 & source == 2
replace dinv = 0 if dstock_now == 0 & source == 2
replace dinv = 1 if dmsci_now == 1 & source == 3
replace dinv = 0 if dmsci_now == 0 & source == 3
replace dinv = 1 if dcrypto_now == 1 & source == 4
replace dinv = 0 if dcrypto_now == 0 & source == 4
label variable dinv "Asset ownership dummy (1=na/missing, 2=stock, 3=msci, 4=bitcoin)"
summ dinv
summ dinv if source == 1
tab dinv dstock_now if source == 2
tab dinv dmsci_now if source == 3
tab dinv dcrypto_now if source == 4

* Prepare the dataset for panel regressions: 
* source is the time dimension, and invid the cross-sectional unit
xtset invid source


************************************************************************
**** Save the panel dataset used for the results in the article
************************************************************************

* Save the panel dataset
*
save $fn_panel, replace



